In [1]:
import pandas as pd

# # Define the path to your Excel file
# excel_file_path = r'..\Documents\All_Data_2023.xlsx'

# # Define the name of the sheet you want to load
# sheet_name_to_load = 'Final data'

# # Define the path for the output CSV file
# csv_output_path = r'..\data\final_data_2023.csv'

# try:
#     # Read the specified sheet from the Excel file into a pandas DataFrame
#     df = pd.read_excel(excel_file_path, sheet_name=sheet_name_to_load)

#     # Save the DataFrame to a CSV file
#     # index=False prevents pandas from writing the DataFrame index as a column
#     df.to_csv(csv_output_path, index=False)

#     print(f"Sheet '{sheet_name_to_load}' from '{excel_file_path}' successfully saved to '{csv_output_path}'")

# except FileNotFoundError:
#     print(f"Error: The file '{excel_file_path}' was not found.")
# except Exception as e:
#     # Catches other potential errors like the sheet name not existing
#     print(f"An error occurred: {e}")
In [2]:
csv_output_path = r'..\data\final_data_2023.csv'
df = pd.read_csv(csv_output_path)

df.head()
Out[2]:
Entry Entry_species Species Population Date ID Flower_No. Length_mm Width_mm Ratio_len/wid ... Flowers_total Marked Wilted Fruits_marked Fruits_notmarked Fruits_total Fruit seeds weight Avg.weight
0 1 23 I.atropurpurea NET 2023-02-26 3 1 63.4 55.0 1.152727 ... 3.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000
1 2 24 I.atropurpurea NET 2023-02-26 10 1 96.0 78.5 1.222930 ... 3.0 2.0 1.0 1.0 0.0 1.0 1.0 14.0 1008.5 72.035714
2 3 36 I.atropurpurea NET 2023-03-05 10 2 77.1 63.4 1.216088 ... NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.000000
3 4 46 I.atropurpurea NET 2023-03-05 29 1 61.0 57.2 1.066434 ... 2.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000
4 5 35 I.atropurpurea NET 2023-02-26 34 1 62.5 59.7 1.046901 ... 2.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000

5 rows × 27 columns

In [4]:
# Import the ProfileReport class
from ydata_profiling import ProfileReport

# Generate the profile report
# title is optional but helpful for identifying the report
profile = ProfileReport(df, title="Iris Data Profiling Report")

# Display the report within the notebook
# This works well in Jupyter environments
profile.to_notebook_iframe()
Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]
100%|██████████| 27/27 [00:00<00:00, 774.74it/s]
Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]
Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]
In [ ]: